#importing the libraries
library(scatterplot3d) # part(a)
library(tidyverse) #filter etc.
library(ggplot2) #plot in reduced dimensions
library(reshape) #melt
library(dplyr)
library(DT) #datatable
Given uWaveGestureLibrary, consisting over 4000 instances from 8 people with 8 different gestures, our aim is to visualize the gestures. Then, we will try to apply dimensionality reduction. There are 8 different gesture classes for each axis, x, y and z.
#reading the data from repository
#x_test=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_X_TEST.txt?raw=true")
x_train=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_X_TRAIN.txt?raw=true")
#y_test=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_Y_TEST.txt?raw=true")
y_train=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_Y_TRAIN.txt?raw=true")
#z_test=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_Z_TEST.txt?raw=true")
z_train=read.table("https://github.com/BU-IE-582/fall20-ilaydacelenk/blob/master/files/HW2_data/uWaveGestureLibrary_Z_TRAIN.txt?raw=true")
For the x-axis, let’s find the indices where we see each gesture for the first time. It is enough to look at x_train since y_train and z_train will have the same indices for the first occurrences of each gesture.
In order to find the velocity vector, we need to compute the cumulative sum of acceleration over time. Since we got the indices, it is easy to sum the
#the first instances when we see gesture of class i, i from 1 to 8
index_vector = c()
for(i in 1:8)
{
index_vector <- index_vector %>% append(match(i, x_train$V1))
}
# Creating cumulative sum matrices 2 times to get first the velocity vector and then the location vector for each instance
# using velocity(t)=cumsum(acceleration)*change in t
# using position(t)=cumsum(velocity)*change in t
loc_matrix <- function(acc_matrix){
order_of_instances <- acc_matrix$V1
acc_matrix$V1 <- 0
loc <- acc_matrix %>% mutate(V1=0) %>% apply(1, cumsum) %>% t() %>% apply(1, cumsum) %>% t() %>% as.data.frame() %>% mutate(V1=order_of_instances)
return(loc)
}
x_position <- loc_matrix(x_train) %>% as.matrix()
y_position <- loc_matrix(y_train) %>% as.matrix()
z_position <- loc_matrix(z_train) %>% as.matrix()
#par(mfrow=c(1,1))
plot_3d <- function(indices){
for(i in 1:8) {
scatterplot3d(x_position[index_vector[i],-1], y_position[index_vector[i],-1], z_position[index_vector[i],-1], main = paste("Gesture Class ", x_position[index_vector[i],1]) ,xlab = "X Axis", ylab = "Y Axis", zlab = "Z Axis", col.grid="lightblue",type = "p", color = "red")
}
}
plot_3d(index_vector)
Here we are dealing with multivariate time series and we would like to reduce it to a univariate time series. In order to achieve this, we transform the data into the long format first.
Column V2 is time 1 and column V316 represents the time index 315.
x_long <- x_position %>% as.data.frame() %>% mutate(id = seq.int(nrow(x_position))) %>% melt(id.vars = c("id", "V1")) %>% transmute(time_series_id=id, time_index=variable, X=value, class=V1)
y_long <- y_position %>% as.data.frame() %>% mutate(id = seq.int(nrow(x_position))) %>% melt(id.vars = c("id", "V1")) %>% transmute(time_series_id=id, time_index=variable, Y=value, class=V1)
z_long <- z_position %>% as.data.frame() %>% mutate(id = seq.int(nrow(x_position))) %>% melt(id.vars = c("id", "V1")) %>% transmute(time_series_id=id, time_index=variable, Z=value, class=V1)
xyz_long <- x_long %>% select(-class) %>% mutate(Y=y_long$Y, Z=z_long$Z, class = x_long$class) %>% mutate(time_index = sub(".", "", time_index))
xyz_long$time_index <- as.numeric(xyz_long$time_index) - 1
Then, we apply PCA on X, Y and Z. This will be applied to the whole data. According to the PCA results, the variance covered by the first component is approximately 1.27 and it is the linear combination 0.209X + 0.723Y + 0.658*Z. Looking at the proportions Comp.1 covers 54% of the variance, Comp.2 covers 36% and Comp.3 covers 10% and they all add up to 100% of the variance. Since we would like to turn it into a univariate time series, we will only choose Comp.1 which has the highest percentage of variance covering.
pca <- princomp(xyz_long[,3:5], cor=T) #scale differences T
print(summary(pca, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.2724995 1.0380930 0.5505524
FALSE Proportion of Variance 0.5397517 0.3592124 0.1010360
FALSE Cumulative Proportion 0.5397517 0.8989640 1.0000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.209 0.908 0.362
FALSE Y 0.723 0.105 -0.683
FALSE Z 0.658 -0.404 0.635
xyz_long_uni <- xyz_long %>% mutate(value = 0.209*X + 0.723*Y + 0.658*Z) %>% select(-X, -Y, -Z)
#selecting 2 random series from each class
ex_class1 <- xyz_long_uni %>% filter(time_series_id==11 | time_series_id==17) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class2 <- xyz_long_uni %>% filter(time_series_id==15 | time_series_id==20) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class3 <- xyz_long_uni %>% filter(time_series_id==4 | time_series_id==13) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class4 <- xyz_long_uni %>% filter(time_series_id==5 | time_series_id==8) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class5 <- xyz_long_uni %>% filter(time_series_id==2 | time_series_id==3) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class6 <- xyz_long_uni %>% filter(time_series_id==1 | time_series_id==10) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class7 <- xyz_long_uni %>% filter(time_series_id==7 | time_series_id==12) %>% mutate(time_series_id = as_factor(time_series_id))
ex_class8 <- xyz_long_uni %>% filter(time_series_id==6 | time_series_id==21) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class1) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 1")
ggplot(ex_class2) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 2")
ggplot(ex_class3) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 3")
ggplot(ex_class4) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 4")
ggplot(ex_class5) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 5")
ggplot(ex_class6) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 6")
ggplot(ex_class7) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 7")
ggplot(ex_class8) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 8")
xyz_long_1 <- xyz_long %>% filter(class==1)
pca1 <- princomp(xyz_long_1[,3:5], cor=T) #scale differences T
print(summary(pca1, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.2932492 1.0091473 0.5559931
FALSE Proportion of Variance 0.5574978 0.3394594 0.1030428
FALSE Cumulative Proportion 0.5574978 0.8969572 1.0000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.119 0.973 0.198
FALSE Y 0.693 -0.224 0.685
FALSE Z 0.711 -0.701
xyz_long_uni_1 <- xyz_long_1 %>% mutate(value = 0.119*X + 0.693*Y + 0.711*Z) %>% select(-X, -Y, -Z)
ex_class1_1 <- xyz_long_uni_1 %>% filter(time_series_id==11 | time_series_id==17) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class1_1) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 1")
xyz_long_2 <- xyz_long %>% filter(class==2)
pca2 <- princomp(xyz_long_2[,3:5], cor=T) #scale differences T
print(summary(pca2, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.1286537 0.9457021 0.9120243
FALSE Proportion of Variance 0.4246197 0.2981175 0.2772628
FALSE Cumulative Proportion 0.4246197 0.7227372 1.0000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.535 0.814 0.224
FALSE Y 0.613 -0.192 -0.767
FALSE Z 0.581 -0.548 0.602
xyz_long_uni_2 <- xyz_long_2 %>% mutate(value = 0.535*X + 0.613*Y + 0.581*Z) %>% select(-X, -Y, -Z)
ex_class2_2 <- xyz_long_uni_2 %>% filter(time_series_id==15 | time_series_id==20) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class2_2) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 2")
xyz_long_3 <- xyz_long %>% filter(class==3)
pca3 <- princomp(xyz_long_3[,3:5], cor=T) #scale differences T
print(summary(pca3, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.2156489 1.0027730 0.7187794
FALSE Proportion of Variance 0.4926008 0.3351846 0.1722146
FALSE Cumulative Proportion 0.4926008 0.8277854 1.0000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.709 0.705
FALSE Y 0.371 0.847 -0.380
FALSE Z -0.599 0.532 0.599
xyz_long_uni_3 <- xyz_long_3 %>% mutate(value = 0.709*X + 0.371*Y - 0.599*Z) %>% select(-X, -Y, -Z)
ex_class3_3 <- xyz_long_uni_3 %>% filter(time_series_id==4 | time_series_id==13) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class3_3) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 3")
xyz_long_4 <- xyz_long %>% filter(class==4)
pca4 <- princomp(xyz_long_4[,3:5], cor=T) #scale differences T
print(summary(pca4, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.2855781 0.9914895 0.6035210
FALSE Proportion of Variance 0.5509037 0.3276838 0.1214125
FALSE Cumulative Proportion 0.5509037 0.8785875 1.0000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.702 0.711
FALSE Y 0.674 0.285 -0.681
FALSE Z -0.230 0.958 0.173
xyz_long_uni_4 <- xyz_long_4 %>% mutate(value = 0.702*X + 0.679*Y - 0.230*Z) %>% select(-X, -Y, -Z)
ex_class4_4 <- xyz_long_uni_4 %>% filter(time_series_id==5 | time_series_id==8) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class4_4) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 4")
xyz_long_5 <- xyz_long %>% filter(class==5)
pca5 <- princomp(xyz_long_5[,3:5], cor=T) #scale differences T
print(summary(pca5, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.4038307 0.9685920 0.30180949
FALSE Proportion of Variance 0.6569135 0.3127235 0.03036299
FALSE Cumulative Proportion 0.6569135 0.9696370 1.00000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.245 0.970
FALSE Y 0.685 -0.177 0.707
FALSE Z 0.686 -0.170 -0.708
xyz_long_uni_5 <- xyz_long_5 %>% mutate(value = 0.245*X + 0.685*Y + 0.686*Z) %>% select(-X, -Y, -Z)
ex_class5_5 <- xyz_long_uni_5 %>% filter(time_series_id==2 | time_series_id==3) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class5_5) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 5")
xyz_long_6 <- xyz_long %>% filter(class==6)
pca6 <- princomp(xyz_long_6[,3:5], cor=T) #scale differences T
print(summary(pca6, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.3400570 0.9638120 0.52470332
FALSE Proportion of Variance 0.5985843 0.3096445 0.09177119
FALSE Cumulative Proportion 0.5985843 0.9082288 1.00000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.291 0.955
FALSE Y -0.671 0.251 -0.698
FALSE Z -0.682 0.162 0.713
xyz_long_uni_6 <- xyz_long_6 %>% mutate(value = 0.291*X - 0.671*Y - 0.682*Z) %>% select(-X, -Y, -Z)
ex_class6_6 <- xyz_long_uni_6 %>% filter(time_series_id==1 | time_series_id==10) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class6_6) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 6")
xyz_long_7 <- xyz_long %>% filter(class==7)
pca7 <- princomp(xyz_long_7[,3:5], cor=T) #scale differences T
print(summary(pca7, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.2971922 0.8883869 0.7266781
FALSE Proportion of Variance 0.5609026 0.2630771 0.1760203
FALSE Cumulative Proportion 0.5609026 0.8239797 1.0000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.596 0.487 0.639
FALSE Y 0.632 0.207 -0.747
FALSE Z 0.496 -0.849 0.184
xyz_long_uni_7 <- xyz_long_7 %>% mutate(value = 0.596*X + 0.632*Y + 0.496*Z) %>% select(-X, -Y, -Z)
ex_class7_7 <- xyz_long_uni_7 %>% filter(time_series_id==7 | time_series_id==12) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class7_7) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 7")
xyz_long_8 <- xyz_long %>% filter(class==8)
pca8 <- princomp(xyz_long_8[,3:5], cor=T) #scale differences T
print(summary(pca8, loadings=T))
FALSE Importance of components:
FALSE Comp.1 Comp.2 Comp.3
FALSE Standard deviation 1.2203138 0.9683089 0.7571078
FALSE Proportion of Variance 0.4963886 0.3125407 0.1910707
FALSE Cumulative Proportion 0.4963886 0.8089293 1.0000000
FALSE
FALSE Loadings:
FALSE Comp.1 Comp.2 Comp.3
FALSE X 0.664 0.253 0.704
FALSE Y 0.337 -0.941
FALSE Z 0.668 0.223 -0.710
xyz_long_uni_8 <- xyz_long_8 %>% mutate(value = 0.664*X + 0.337*Y + 0.668*Z) %>% select(-X, -Y, -Z)
ex_class8_8 <- xyz_long_uni_8 %>% filter(time_series_id==6 | time_series_id==21) %>% mutate(time_series_id = as_factor(time_series_id))
ggplot(ex_class8_8) + geom_line(aes(x=time_index, y=value, colour=time_series_id)) + xlab("Time Index") + ylab("Value") + theme(plot.title=element_text(hjust = 0.5)) + ggtitle("2 Time Series from Class 8")
[1] J. Liu, Z. Wang, L. Zhong, J. Wickramasuriya, and V. Vasudevan. uWave: Accelerometer-based personalized gesture recognition and its applications. Pervasive Computing and Communications, IEEE International Conference on, 0:1-9, 2009. (link: https://www.recg.org/publications/liu09percom.pdf)